S2_MGF Manuscript Figure 5 - hydrocarbon degraders - ranked differential plots

Last updated 4 Dec 2020 by Steve Formel

Description: Is there a relationship between diversity of salt marsh soil microbial communities and oil abundance?

Load libraries

library(cowplot)
library(tidyverse)
library(brms)

Load and Clean data

Here I use a convenient phyloseq function to generate all three metrics.

source("./S2_MGF_load_packages_and_clean_data.R")

p <- plot_richness(fung.2season_with_outliers)

#convert value to Hill numbers

p1 <- p$data
p1$hill_value <- NA

p1$value[which(p1$variable == "Observed")] <- p1$value[which(p1$variable == "Observed")]

p1$value[which(p1$variable == "Shannon")] <- exp(p1$value[which(p1$variable == "Shannon")])

p1$value[which(p1$variable == "Simpson")] <- 1/(1 - p1$value[which(p1$variable == "Simpson")])

p1$hill_order <- NA

p1$hill_order[which(p1$variable == "Observed")] <- 0
p1$hill_order[which(p1$variable == "Shannon")] <- 1
p1$hill_order[which(p1$variable == "Simpson")] <- 2

#rename levels for site
p1$site <- as.factor(p1$site)
levels(p1$site) <- plyr:::revalue(levels(p1$site), c("BJ" = "Heavily Oiled", "F" = "Lightly Oiled"))

#total PAHs----
p1 <- p1 %>%
  filter(variable %in% c("Observed", "Shannon", "Simpson")) %>%
  droplevels()

p.list <- split.data.frame(x = p1, f = p1$variable)

#also for plotting ultimate figure
p.fung <-p.list

Bayesian LM of alpha diversity against total relevant PAHs

based on https://solomonkurz.netlify.app/post/robust-linear-regression-with-the-robust-student-s-t-distribution/ and https://bayesed-and-confused.netlify.app/post/model-fit-checks/

I look at the models as they’re generated and ultimately create a table of beta and R-squared values.

Hill Order = 0

This is the equivalent of richness.

f0 <- brm(value ~ Total.relevant.PAHs, 
          data = p.list[[1]],
          family = "gaussian",
          chains = 4, cores = 4,
          seed = 1)

M <- f0
pp_check(object = M, type = "dens_overlay", nsamples = 100)

pp_check(M, type = "stat", stat = 'median', nsamples = 100)

pp_check(M, type = "stat", stat = 'mean', nsamples = 100)

pp_check(M,type = 'intervals')

plot(M)

summ <- as.data.frame(posterior_summary(M))

#####R2
r2 <- as.data.frame(bayes_R2(object = M,resp = NULL,summary = TRUE,robust = FALSE,probs = c(0.025, 0.975)))

#get parameters for plotting
df.plot <- rbind(summ[2,], r2[1,])
df.plot$hill_order <- "0"
df.plot$param <- rownames(df.plot)

df.plot.done <- df.plot

Hill Order = 1

This is the equivalent of Shannon Diversity.

f1 <- brm(value ~ Total.relevant.PAHs, 
          data = p.list[[2]],
          family = "skew_normal",
          chains = 4, cores = 4,
          seed = 1)

M <- f1
pp_check(object = M, type = "dens_overlay", nsamples = 100)

pp_check(M, type = "stat", stat = 'median', nsamples = 100)

pp_check(M, type = "stat", stat = 'mean', nsamples = 100)

pp_check(M,type = 'intervals')

plot(M)

summ <- as.data.frame(posterior_summary(M))

#R2
r2 <- as.data.frame(bayes_R2(object = M,resp = NULL,summary = TRUE,robust = FALSE,probs = c(0.025, 0.975)))

#get parameters for plotting
df.plot <- rbind(summ[2,], r2[1,])
df.plot$hill_order <- "1"
df.plot$param <- rownames(df.plot)

df.plot.done <- rbind(df.plot.done, df.plot)

Hill Order = 2

This is the equivalent of Simposon Diversity.

f2 <- brm(value ~ Total.relevant.PAHs, 
                data = p.list[[3]],
                family = "skew_normal",
                chains = 4, cores = 4,
                seed = 1,
          control = list(adapt_delta = 0.99))

M <- f2
pp_check(object = M, type = "dens_overlay", nsamples = 100)

pp_check(M, type = "stat", stat = 'median', nsamples = 100)

pp_check(M, type = "stat", stat = 'mean', nsamples = 100)

pp_check(M,type = 'intervals')

plot(M)

print(M)
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: value ~ Total.relevant.PAHs 
##    Data: p.list[[3]] (Number of observations: 82) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept               5.94      0.37     5.27     6.74 1.00     1578     1599
## Total.relevant.PAHs    -0.00      0.00    -0.01     0.00 1.00     1851     1622
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.61      0.29     3.11     4.25 1.00     1460     1787
## alpha     8.20      2.42     3.99    13.37 1.00     1863     1738
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summ <- as.data.frame(posterior_summary(M))

#R2
r2 <- as.data.frame(bayes_R2(object = M,resp = NULL,summary = TRUE,robust = FALSE,probs = c(0.025, 0.975)))

#get parameters for plotting
df.plot <- rbind(summ[2,], r2[1,])
df.plot$hill_order <- "2"
df.plot$param <- rownames(df.plot)

Look at table

df.plot.done <- rbind(df.plot.done, df.plot)
df.plot.done$microbe <- "Fungi"
fungi.plot.done <- df.plot.done

plot.params.df <- fungi.plot.done

plot.params.df
##                            Estimate   Est.Error          Q2.5       Q97.5
## b_Total.relevant.PAHs  -0.009470966 0.020140572 -4.844743e-02 0.029914825
## R2                      0.014453834 0.018587004  1.856109e-05 0.067263744
## b_Total.relevant.PAHs1 -0.003995385 0.006477364 -1.929337e-02 0.005685893
## R21                     0.016623170 0.027595167  1.164506e-05 0.101515224
## b_Total.relevant.PAHs2 -0.002493690 0.004200763 -1.264174e-02 0.003404719
## R22                     0.015367593 0.026486141  7.171018e-06 0.097598599
##                        hill_order                 param microbe
## b_Total.relevant.PAHs           0 b_Total.relevant.PAHs   Fungi
## R2                              0                    R2   Fungi
## b_Total.relevant.PAHs1          1 b_Total.relevant.PAHs   Fungi
## R21                             1                    R2   Fungi
## b_Total.relevant.PAHs2          2 b_Total.relevant.PAHs   Fungi
## R22                             2                    R2   Fungi

Write table

write.csv(plot.params.df, "../../../../results/images/manuscript/S2_MGF_final/S2_MGF_oil_div_results.csv")